import time
import argparse
import hashlib
import json
import logging
import os
import re
import math
import random
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
from openai import OpenAI
import tiktoken

enc = tiktoken.get_encoding("cl100k_base")


MAX_INPUT_TOKENS = 30000

def truncate_by_tokens(text, max_tokens):
    tokens = enc.encode(text)
    if len(tokens) <= max_tokens:
        return text
    return enc.decode(tokens[:max_tokens])

def read_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

def write_json(data, file_path):
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=2)


def critic_question(prompt):
    response = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[{'role': 'user', 'content': prompt}],
        temperature=0.0,
        max_tokens=10000
    )
    extracted_text = response.choices[0].message.content.strip()
    return extracted_text


OPENAI_API_KEY = 'FAKE_API_KEY'

MODEL_NAME = ""
api_endpoint="http://127.0.0.1:8000/v1/"

client = OpenAI(base_url=api_endpoint, api_key=OPENAI_API_KEY) 

ANALYSIS_PROMPT_TEMPLATE = """
            You are an expert in Operations Research (OR). You will be given an optimization problem and (optionally) a step-by-step solution, which may or may not include code.

            Task:
            Review the solution. Analyze each applicable part in order. Be concise — only highlight critical errors or omissions. Skip any section if the input doesn't contain it (e.g., no code → skip Code Analysis).

            Evaluate in this order:

            1. Variable Definitions
            2. Objective Function & Constraints
            3. Code Implementation (if provided)
            4. Final Answer / Output

            Question:
            {Question}

            Solution Steps:
            {Solution}

            Output Format (be brief and precise):

            1. Variable Definition Analysis
            - Intent: [e.g., Define decision variables]
            - Analysis: [Only note missing, redundant, or misdefined variables]
            - Judgement: [Correct/Incorrect]

            2. Objective & Constraint Analysis
            - Intent: [e.g., Formulate model]
            - Objective: [Correct? Brief reason if wrong]
            - Constraints: [Missing/incorrect? List only key issues]
            - Judgement: [Correct/Incorrect]

            3. Code Analysis (Skip if no code)
            - Intent: Implement model in Pyomo/Python
            - Analysis: [Only flag mismatches: missing vars/constraints, wrong indexing, type errors]
            - Judgement: [Correct/Incorrect or Skipped]

            4. Final Answer Analysis
            - Intent: [e.g., Report solution or error]
            - Analysis: [Plausible? Error meaningful? Root cause if wrong]
            - Judgement: [Correct/Incorrect]

            Corrected Step (Only if any part above is Incorrect)
            - [Rewrite only the first incorrect section — e.g., fix constraints or variables — in full, clearly labeled.]
            """

dataset_names = ['Mamo_complex_lp']
input_dir = ''

parser = argparse.ArgumentParser()
parser.add_argument('--model_path', type=str, required=True, help='Path to the LLM model directory')
args = parser.parse_args()
model_name = args.model_path

for dataset_name in tqdm(dataset_names, desc="All Datasets"):
    #input_file = os.path.join(input_dir, f"{dataset_name}.jsonl")
    #import pdb; pdb.set_trace()
    input_file = os.path.join(input_dir, dataset_name, f"{model_name}_results.json")

    
    output_dir = f"results_critic_half_one_zero/{dataset_name}"
    os.makedirs(output_dir, exist_ok=True)
    
    #import pdb; pdb.set_trace()
    output_file = os.path.join(output_dir, f"{model_name}_critic_results.json")
    
    
    if os.path.exists(output_file):
        logging.info(f"Output file for {dataset_name} already exists, skipping...")
        continue
    
        logging.info(f"Processing dataset: {dataset_name}")
    data = read_json(input_file)

    #data = random.sample(data, 100)

    all_prompts = []
    sample_prompt_indices = []  

    for d in data:
        outputs = d['outputs']
        sample_prompts = []
        for output in outputs:
            try:
                output_final = "\n\n".join(output)
                prompt = ANALYSIS_PROMPT_TEMPLATE.format(Question=d['question'], Solution=output_final)
                prompt = prompt[:20000]
            except:
                prompt = " "
            sample_prompts.append(prompt)
        all_prompts.extend(sample_prompts)
        sample_prompt_indices.append(len(sample_prompts))
    
    total_prompts = len(all_prompts)
    logging.info(f"Total prompts to process: {total_prompts}")

    with ThreadPoolExecutor(max_workers=100) as executor:  
        all_critics = list(tqdm(
            executor.map(critic_question, all_prompts),
            total=total_prompts,
            desc=f"Global Critic Processing ({dataset_name})",
            leave=False
        ))

    idx = 0
    for i, d in enumerate(data):
        num_prompts_for_this_sample = sample_prompt_indices[i]
        d['critics'] = all_critics[idx:idx + num_prompts_for_this_sample]
        idx += num_prompts_for_this_sample

    write_json(data, output_file)
    logging.info(f"Processing for dataset '{dataset_name}' complete. Results saved to {output_file}")



















